Allow exempting domain names from rel="nofollow"
authorAryeh Gregor <simetrical@users.mediawiki.org>
Fri, 23 Jan 2009 18:03:12 +0000 (18:03 +0000)
committerAryeh Gregor <simetrical@users.mediawiki.org>
Fri, 23 Jan 2009 18:03:12 +0000 (18:03 +0000)
This introduces a new configuration option, $wgNoFollowDomainExceptions.
By default this is an empty array; perhaps it should be null by default
and initialize to something extracted from $wgServer.  An appropriate
value for Wikimedia would be something like:

$wgNoFollowDomainExceptions = array( 'wikipedia.org', 'wiktionary.org',
'wikibooks.org', ... );

It's fairly silly that we're nofollowing links to our own sites.  :)

RELEASE-NOTES
includes/DefaultSettings.php
includes/parser/Parser.php

index 742910e..cd6453a 100644 (file)
@@ -23,6 +23,8 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * Added $wgNewPasswordExpiry, to specify an expiry time (in seconds) to
   temporary passwords
 * Added $wgUseTwoButtonsSearchForm to choose the Search form behavior/look
+* Added $wgNoFollowDomainExceptions to allow exempting particular domain names
+  from rel="nofollow" on external links
 
 === New features in 1.15 ===
 
index 9ec4d13..3119e4b 100644 (file)
@@ -3084,6 +3084,19 @@ $wgNoFollowLinks = true;
  */
 $wgNoFollowNsExceptions = array();
 
+/**
+ * If this is set to an array of domains, external links to these domain names
+ * (or any subdomains) will not be set to rel="nofollow" regardless of the
+ * value of $wgNoFollowLinks.  For instance:
+ *
+ * $wgNoFollowDomainExceptions = array( 'en.wikipedia.org', 'wiktionary.org' );
+ *
+ * This would add rel="nofollow" to links to de.wikipedia.org, but not
+ * en.wikipedia.org, wiktionary.org, en.wiktionary.org, us.en.wikipedia.org,
+ * etc.
+ */
+$wgNoFollowDomainExceptions = array();
+
 /**
  * Default robot policy.  The default policy is to encourage indexing and fol-
  * lowing of links.  It may be overridden on a per-namespace and/or per-page
index c37c48e..b8c1325 100644 (file)
@@ -1130,7 +1130,7 @@ class Parser
                if ( $text === false ) {
                        # Not an image, make a link
                        $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', 
-                               $this->getExternalLinkAttribs() );
+                               $this->getExternalLinkAttribs( $url ) );
                        # Register it in the output object...
                        # Replace unnecessary URL escape codes with their equivalent characters
                        $pasteurized = self::replaceUnusualEscapes( $url );
@@ -1410,8 +1410,8 @@ class Parser
                        # This means that users can paste URLs directly into the text
                        # Funny characters like &ouml; aren't valid in URLs anyway
                        # This was changed in August 2004
-                       $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->getExternalLinkAttribs() ) 
-                               . $dtrail . $trail;
+                       $s .= $sk->makeExternalLink( $url, $text, false, $linktype,
+                               $this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;
 
                        # Register link in the output object.
                        # Replace unnecessary URL escape codes with the referenced character
@@ -1424,12 +1424,36 @@ class Parser
                return $s;
        }
 
-       function getExternalLinkAttribs() {
+       /**
+        * Get an associative array of additional HTML attributes appropriate for a
+        * particular external link.  This currently may include rel => nofollow
+        * (depending on configuration, namespace, and the URL's domain) and/or a
+        * target attribute (depending on configuration).
+        *
+        * @param string $url Optional URL, to extract the domain from for rel =>
+        *   nofollow if appropriate
+        * @return array Associative array of HTML attributes
+        */
+       function getExternalLinkAttribs( $url = false ) {
                $attribs = array();
                global $wgNoFollowLinks, $wgNoFollowNsExceptions;
                $ns = $this->mTitle->getNamespace();
                if( $wgNoFollowLinks && !in_array($ns, $wgNoFollowNsExceptions) ) {
                        $attribs['rel'] = 'nofollow';
+
+                       global $wgNoFollowDomainExceptions;
+                       if ( $wgNoFollowDomainExceptions ) {
+                               $bits = wfParseUrl( $url );
+                               if ( is_array( $bits ) && isset( $bits['host'] ) ) {
+                                       foreach ( $wgNoFollowDomainExceptions as $domain ) {
+                                               if( substr( $bits['host'], -strlen( $domain ) )
+                                               == $domain ) {
+                                                       unset( $attribs['rel'] );
+                                                       break;
+                                               }
+                                       }
+                               }
+                       }
                }
                if ( $this->mOptions->getExternalLinkTarget() ) {
                        $attribs['target'] = $this->mOptions->getExternalLinkTarget();